library(haven)
library(coefplot)
library(stargazer)
library(broom)
library(tidyverse)

study1 <- read_dta('Study 3/processed_data/raw dat/study1.dta')
study2 <- read_dta('Study 3/processed_data/raw dat/study2.dta')
study3 <- read_dta('Study 3/processed_data/raw dat/study3.dta')
study4 <- read_dta('Study 3/processed_data/raw dat/study4.dta')

se <- function(x, na.rm = TRUE) {
  sd(x, na.rm = na.rm) / sqrt(length(!is.na(x)))
}

stargazer(as.data.frame(study1), type = "text")

## Finding RR Conditional Means


## create indicator variables for timing 1 = distal, 2 = proximal, 3 = post
study1 <- study1 |>
  mutate(
    timing = if_else(!is.na(implicit_lag), 1, NA),
    timing = if_else(!is.na(implicit_nolag), 2, timing),
    timing = if_else(!is.na(implicit_post), 3, timing)
  )

# r study 1 means and SEs
s1_timing <- study1 |>
  filter(!is.na(timing)) |> 
  summarize(
    study = 1,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = timing
  )

# r study 2 means and SE
study2 <- study2 |> 
  mutate(timing = 3)

s2_timing <- study2 |> 
  filter(!is.na(timing)) |> 
  summarize(
    study = 2,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = timing
  )

# r study 3 means and SE
study3 <- study3 |> 
  mutate(timing = 3)

s3_timing <- study3 |> 
  filter(!is.na(timing)) |> 
  summarize(
    study = 3,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = timing
  )

# r study 4 cleaning
## creating indicator variable
study4 <- study4 |> 
  mutate(
    timing = if_else(!is.na(implicit_distal), 1, NA),
    timing = if_else(!is.na(implicit_proximal), 2, timing),
    timing = if_else(!is.na(implicit_post), 3, timing)
  )

# r study 4 means and SE
s4_timing <- study4 |> 
  filter(!is.na(timing)) |> 
  summarize(
    study = 4,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(support_social_welfare01, na.rm = T),
    hc_indexse = se(support_social_welfare01, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = timing
  )

## Create dataframe with means

grouped_means <- bind_rows(
  s1_timing,
  s2_timing,
  s3_timing,
  s4_timing
)

grouped_means <- grouped_means |> 
  mutate(timing = factor(timing,
                         levels = c(1, 2, 3),
                         labels = c("Distal", "Proximal", "Post")),
         study = factor(study))

#save(grouped_means, file = "Study 3/processed_data/grouped_means.rda")

## Creating Dataframe with DV Means

grouped_means_l <- grouped_means |> 
  rename(means = leader_indexmean,
         se = leader_indexse) |> 
  mutate(dv = 2) |> 
  select(-contains("hc"))

grouped_means_h <- grouped_means |> 
  rename(means = hc_indexmean,
         se = hc_indexse) |> 
  mutate(dv = 1) |> 
  select(-contains("leader"))

dvmeans <- rbind(
  grouped_means_h,
  grouped_means_l
) |>
  mutate(dv = factor(dv, 
                     levels = c(1, 2),
                     labels = c("Health Care Index", "Leader Index")))

#save(dvmeans, file = "Study 3/processed_data/dvmeans.rda")

## Treatment Dataframe

# r study 1 treatment groups

study1 <- study1 |> 
  rename(implicit_distal = implicit_lag,
         implicit_proximal = implicit_nolag)

s1_treatment_timing <- study1 |> 
  summarise(
    study = 1,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = c(three_conditions, timing)
  )

# r study 2 treatment
s2_treatment_timing <- study2 |> 
  summarise(
    study = 2,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = c(three_conditions, timing)
  )

# r study 3 treatment
s3_treatment_timing <- study3 |> 
  summarize(
    study = 3,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = c(three_conditions, timing)
  )

# r study 4 treatment groups

study4 <- study4 |> 
  rename(hc_index = support_social_welfare01)

s4_treatment_timing <- study4 |> 
  summarize(
    study = 4,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = c(three_conditions, timing)
  )

treatment <- rbind(
  s1_treatment_timing,
  s2_treatment_timing,
  s3_treatment_timing,
  s4_treatment_timing
)

treatment <- treatment |> 
  mutate(
    timing = factor(timing,
                    levels = c(1, 2, 3),
                    labels = c("Distal", "Proximal", "Post")),
    three_conditions = factor(three_conditions,
                              levels = c(0, 1, 2),
                              labels = c("Explicit", "Implicit", "Control")),
    study = factor(study)
  )

treatment1 <- treatment |> 
  rename(means = leader_indexmean,
         se = leader_indexse) |> 
  select(-starts_with("hc")) |> 
  mutate(dv = "Leader Index")

treatment2 <- treatment |> 
  rename(means = hc_indexmean,
         se = hc_indexse) |> 
  select(-starts_with("leader")) |> 
  mutate(dv = "Health Care Index")

treatmentdouble <- rbind(
  treatment1,
  treatment2
) |> 
  mutate(
    dv = factor(dv)
  )

#save(treatment, treatmentdouble, file = "Study 3/processed_data/treatment_means.rda")

## Create dataframe with studies 1 & 4

studies_1_4 <- rbind(
  study1 |> select(symrac01, hc_index, leader_index, timing) |> mutate(study = "Experiment 1"),
  study4 |> select(symrac01, hc_index, leader_index, timing) |> mutate(study = "Experiment 4")
)

# create high/low RR

study1 <- study1 |> 
  mutate(high_rr = if_else(symrac01 >= 0.5, 1, 0),
         high_rr = factor(high_rr))

study2 <- study2 |> 
  mutate(high_rr = if_else(symrac01 >= 0.5, 1, 0),
         high_rr = factor(high_rr))

study3 <- study3 |> 
  mutate(high_rr = if_else(symrac01 >= 0.5, 1, 0),
         high_rr = factor(high_rr))

study4 <- study4 |> 
  mutate(high_rr = if_else(symrac01 >= 0.5, 1, 0),
         high_rr = factor(high_rr))

#save(studies_1_4, file = "Study 3/processed_data/studies_1_4.rda")

#save(study1, study2, study3, study4, file = "Study 3/processed_data/studies.rda")

## Dataframe for high/low RR facet

s1_treatment_timingRR <- study1 |> 
  summarise(
    study = 1,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = c(three_conditions, timing, high_rr)
  )

# r study 2 treatment
s2_treatment_timingRR <- study2 |> 
  summarise(
    study = 2,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = c(three_conditions, timing, high_rr)
  )

# r study 3 treatment
s3_treatment_timingRR <- study3 |> 
  summarize(
    study = 3,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = c(three_conditions, timing, high_rr)
  )

# r study 4 treatment groups

s4_treatment_timingRR <- study4 |> 
  summarize(
    study = 4,
    symrac01mean = mean(symrac01, na.rm = T),
    symrac01se = se(symrac01, na.rm = T),
    hc_indexmean = mean(hc_index, na.rm = T),
    hc_indexse = se(hc_index, na.rm = T),
    leader_indexmean = mean (leader_index, na.rm = T),
    leader_indexse = se(leader_index, na.rm = T),
    n = n(),
    .by = c(three_conditions, timing, high_rr)
  )

treatmentRR <- rbind(
  s1_treatment_timingRR,
  s2_treatment_timingRR,
  s3_treatment_timingRR,
  s4_treatment_timingRR
)

treatmentRR <- treatmentRR |> 
  mutate(
    timing = factor(timing,
                    levels = c(1, 2, 3),
                    labels = c("Distal", "Proximal", "Post")),
    three_conditions = factor(three_conditions,
                              levels = c(0, 1, 2),
                              labels = c("Explicit", "Implicit", "Control")),
    study = factor(study)
  )

treatment1RR <- treatmentRR |> 
  rename(means = leader_indexmean,
         se = leader_indexse) |> 
  select(-starts_with("hc")) |> 
  mutate(dv = "Leader Index")

treatment2RR <- treatmentRR |> 
  rename(means = hc_indexmean,
         se = hc_indexse) |> 
  select(-starts_with("leader")) |> 
  mutate(dv = "Health Care Index")

treatmentdoubleRR <- rbind(
  treatment1RR,
  treatment2RR
) |> 
  mutate(
    dv = factor(dv)
  )

#save(treatmentdoubleRR, file = "Study 3/processed_data/treatment_rr.rda")

